**************************
***	RUN USING STATA 16 ***
**************************

* Purpose: clean maths objective and subjective datasets
* Last Updated: 06 May 2021

*-----------------------------------------------------------------------------------
* objective items            
*-----------------------------------------------------------------------------------

	use "$input\mat_o_scored.dta", clear
	
	//clean duplicates and anomalies
	drop if flag_id1 == 1
	drop if version > 30
	drop flag* scode subject tscore

	save "$output\mat_o_clean.dta", replace
	
*-----------------------------------------------------------------------------------
* subjective items           
*-----------------------------------------------------------------------------------

	use "$input\mat_s_scored.dta", clear
	drop if flag_id1 == 1
	drop if version > 30
	drop flag* dup TotalMarks total subject scode pid marker
	ren c pid
	
*make item-level adjustments to correct for known error in stem, introduced by research team

			*2011, item 9, parts b-d (form 4/38 & 16/37)
				replace v38b = . if version == 4
				replace v38c = . if version == 4
				replace v38d = . if version == 4
				replace v37b = . if version == 16
				replace v37c = . if version == 16
				replace v37d = . if version == 16
			*2012, item 12, parts c-d (form 11/40 & 30/39)
				replace v40b = 4 if version == 11 & v40b > 4 
				replace v40c = . if version == 11
				replace v40d = . if version == 11
				replace v39b = 4 if version == 30 & v39b > 4
				replace v39c = . if version == 30
				replace v39d = . if version == 30
			*2012, item 6, part a (form 10/36 & 30/36)
				replace v36a = . if version == 10
				replace v36a = . if version == 30
			*2015, item 11, part c (form 3/40 & 26/39)
				replace v40a = 3 if version == 3 & v40a > 3
				replace v40b = 4 if version == 3 & v40a > 4
				replace v40c = . if version == 3
				replace v39a = 3 if version == 26 & v39a > 3
				replace v39b = 4 if version == 26 & v39a > 4
				replace v39c = . if version == 26
			*2016, item 2, part a (form 10/34 & 27/33)
				replace v34a = . if version == 10
				replace v33a = . if version == 27
			*2018, item 6, part b (form 9/36 & 26/36)
				replace v36b = . if version == 9
				replace v36b = . if version == 26
			*2018, item 8, parts a-c (form 2/37 & 19/37)
				replace v37a = . if version == 2
				replace v37b = . if version == 2
				replace v37c = . if version == 2
				replace v37d = . if version == 2
				replace v37a = . if version == 19
				replace v37b = . if version == 19
				replace v37c = . if version == 19
				replace v37d = . if version == 19
			*2018, item 12, part d (form 10/40 & 19/40)
				replace v40d = . if version == 10
				replace v40d = . if version == 19

		//generate question level scores for subjective
		forvalues n = 33/41 {
		egen v`n' = rowtotal(v`n'a-v`n'd), missing
		}
		
****************************************************************************************************************************************************
**************** THIS SECTION ALLOWS US TO DISTINGUISH BETWEEN ITEMS NOT ANSWERED AND ITEMS NOT ANSWERED WHICH WE ARE TAKING AS ANSWERED INCORRECTLY	
****************************************************************************************************************************************************	
	
*** Identify items that are 'Not reached' which will be retained as missing per TIMSS
	replace v41 = 99 if v41==. & v40==.
	forvalues i = 40(-1)34 {
	local h = `i' - 1
	local j = `i' + 1
    replace v`i' = 99 if v`i' == . & v`h'==. & v`j'==99 
	}
	
*** Recode so that not reached remains missing and reached but unanswered incorrect
	forvalues i = 33/41 {
	recode v`i' (99=.) (.=0)
	}
	
*** Replace v33 as missing for those that have no responses to any item
	egen miss = rowtotal(v34-v41), missing
	replace v33=. if miss==.
	drop miss
	

	save "$output\mat_s_clean.dta", replace
		
		
		
		
		
		
		
		
		
		
